// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v $
// $Author: derrickoswald $
// $Date: 2006/06/02 02:43:25 $
// $Revision: 1.57 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.lexer;

import org.htmlparser.http.ConnectionManager;
import org.htmlparser.util.ParserException;

import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.UnknownHostException;
import java.util.zip.GZIPInputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;

/**
 * Represents the contents of an HTML page. Contains the source of characters
 * and an index of positions of line separators (actually the first character
 * position on the next line).
 */
public class Page implements Serializable {
    /**
     * The default charset. This should be <code>{@value}</code>, see RFC 2616
     * (http://www.ietf.org/rfc/rfc2616.txt?number=2616) section 3.7.1
     * <p/>
     * Another alias is "8859_1".
     */
    public static final String DEFAULT_CHARSET = "ISO-8859-1";

    /**
     * The default content type. In the absence of alternate information, assume
     * html content ({@value} ).
     */
    public static final String DEFAULT_CONTENT_TYPE = "text/html";

    /**
     * Character value when the page is exhausted. Has a value of {@value} .
     */
    public static final char EOF = (char) Source.EOF;

    /**
     * The URL this page is coming from. Cached value of
     * <code>getConnection().toExternalForm()</code> or <code>setUrl()</code>.
     */
    protected String mUrl;

    /**
     * The base URL for this page.
     */
    protected String mBaseUrl;

    /**
     * The source of characters.
     */
    protected Source mSource;

    /**
     * Character positions of the first character in each line.
     */
    protected PageIndex mIndex;

    /**
     * The connection this page is coming from or <code>null</code>.
     */
    protected transient URLConnection mConnection;

    /**
     * Connection control (proxy, cookies, authorization).
     */
    protected static ConnectionManager mConnectionManager = new ConnectionManager();

    /**
     * Construct an empty page.
     */
    public Page() {
        this("");
    }

    /**
     * Construct a page reading from a URL connection.
     *
     * @param connection A fully conditioned connection. The connect() method will be
     *                   called so it need not be connected yet.
     * @throws ParserException An exception object wrapping a number of possible error
     *                         conditions, some of which are outlined below. <li>
     *                         IOException If an i/o exception occurs creating the
     *                         source.</li> <li>UnsupportedEncodingException if the
     *                         character set specified in the HTTP header is not
     *                         supported.</li>
     */
    public Page(URLConnection connection) throws ParserException {
        if (null == connection) throw new IllegalArgumentException("connection cannot be null");
        setConnection(connection);
        mBaseUrl = null;
    }

    /**
     * Construct a page from a stream encoded with the given charset.
     *
     * @param stream  The source of bytes.
     * @param charset The encoding used. If null, defaults to the
     *                <code>DEFAULT_CHARSET</code>.
     * @throws UnsupportedEncodingException If the given charset is not supported.
     */
    public Page(InputStream stream, String charset) throws UnsupportedEncodingException {
        if (null == stream) throw new IllegalArgumentException("stream cannot be null");
        if (null == charset) charset = DEFAULT_CHARSET;
        mSource = new InputStreamSource(stream, charset);
        mIndex = new PageIndex(this);
        mConnection = null;
        mUrl = null;
        mBaseUrl = null;
    }

    /**
     * Construct a page from the given string.
     *
     * @param text    The HTML text.
     * @param charset <em>Optional</em>. The character set encoding that will be
     *                reported by {@link #getEncoding}. If charset is
     *                <code>null</code> the default character set is used.
     */
    public Page(String text, String charset) {
        if (null == text) throw new IllegalArgumentException("text cannot be null");
        if (null == charset) charset = DEFAULT_CHARSET;
        mSource = new StringSource(text, charset);
        mIndex = new PageIndex(this);
        mConnection = null;
        mUrl = null;
        mBaseUrl = null;
    }

    /**
     * Construct a page from the given string. The page will report that it is
     * using an encoding of {@link #DEFAULT_CHARSET}.
     *
     * @param text The HTML text.
     */
    public Page(String text) {
        this(text, null);
    }

    /**
     * Construct a page from a source.
     *
     * @param source The source of characters.
     */
    public Page(Source source) {
        if (null == source) throw new IllegalArgumentException("source cannot be null");
        mSource = source;
        mIndex = new PageIndex(this);
        mConnection = null;
        mUrl = null;
        mBaseUrl = null;
    }

    //
    // static methods
    //

    /**
     * Get the connection manager all Parsers use.
     *
     * @return The connection manager.
     */
    public static ConnectionManager getConnectionManager() {
        return (mConnectionManager);
    }

    /**
     * Set the connection manager to use.
     *
     * @param manager The new connection manager.
     */
    public static void setConnectionManager(ConnectionManager manager) {
        mConnectionManager = manager;
    }

    /**
     * Get a CharacterSet name corresponding to a charset parameter.
     *
     * @param content A text line of the form:
     *                <p/>
     *                <pre>
     *                text/html; charset=Shift_JIS
     *                </pre>
     *                <p/>
     *                which is applicable both to the HTTP header field Content-Type
     *                and the meta tag http-equiv="Content-Type". Note this method
     *                also handles non-compliant quoted charset directives such as:
     *                <p/>
     *                <pre>
     *                text/html; charset="UTF-8"
     *                </pre>
     *                <p/>
     *                and
     *                <p/>
     *                <pre>
     *                text/html; charset='UTF-8'
     *                </pre>
     * @return The character set name to use when reading the input stream. For
     *         JDKs that have the Charset class this is qualified by passing the
     *         name to findCharset() to render it into canonical form. If the
     *         charset parameter is not found in the given string, the default
     *         character set is returned.
     * @see #findCharset
     * @see #DEFAULT_CHARSET
     */
    public String getCharset(String content) {
        final String CHARSET_STRING = "charset";
        int index;
        String ret;

        if (null == mSource)
            ret = DEFAULT_CHARSET;
        else
            // use existing (possibly supplied) character set:
            // bug #1322686 when illegal charset specified
            ret = mSource.getEncoding();
        if (null != content) {
            index = content.indexOf(CHARSET_STRING);

            if (index != -1) {
                content = content.substring(index + CHARSET_STRING.length()).trim();
                if (content.startsWith("=")) {
                    content = content.substring(1).trim();
                    index = content.indexOf(";");
                    if (index != -1) content = content.substring(0, index);

                    // remove any double quotes from around charset string
                    if (content.startsWith("\"") && content.endsWith("\"")
                            && (1 < content.length()))
                        content = content.substring(1, content.length() - 1);

                    // remove any single quote from around charset string
                    if (content.startsWith("'") && content.endsWith("'") && (1 < content.length()))
                        content = content.substring(1, content.length() - 1);

                    ret = findCharset(content, ret);

                    // Charset names are not case-sensitive;
                    // that is, case is always ignored when comparing
                    // charset names.
                    // if (!ret.equalsIgnoreCase (content))
                    // {
                    // System.out.println (
                    // "detected charset \""
                    // + content
                    // + "\", using \""
                    // + ret
                    // + "\"");
                    // }
                }
            }
        }

        return (ret);
    }

    /**
     * Lookup a character set name.
     * <em>Vacuous for JVM's without <code>java.nio.charset</code>.</em> This
     * uses reflection so the code will still run under prior JDK's but in that
     * case the default is always returned.
     *
     * @param name     The name to look up. One of the aliases for a character set.
     * @param fallback The name to return if the lookup fails.
     * @return The character set name.
     */
    public static String findCharset(String name, String fallback) {
        String ret;

        try {
            Class cls;
            Method method;
            Object object;

            cls = Class.forName("java.nio.charset.Charset");
            method = cls.getMethod("forName", new Class[]{String.class});
            object = method.invoke(null, new Object[]{name});
            method = cls.getMethod("name", new Class[]{});
            object = method.invoke(object, new Object[]{});
            ret = (String) object;
        } catch (ClassNotFoundException cnfe) {
            // for reflection exceptions, assume the name is correct
            ret = name;
        } catch (NoSuchMethodException nsme) {
            // for reflection exceptions, assume the name is correct
            ret = name;
        } catch (IllegalAccessException ia) {
            // for reflection exceptions, assume the name is correct
            ret = name;
        } catch (InvocationTargetException ita) {
            // java.nio.charset.IllegalCharsetNameException
            // and java.nio.charset.UnsupportedCharsetException
            // return the default
            ret = fallback;
            System.out.println("unable to determine cannonical charset name for " + name
                    + " - using " + fallback);
        }

        return (ret);
    }

    //
    // Serialization support
    //

    /**
     * Serialize the page. There are two modes to serializing a page based on
     * the connected state. If connected, the URL and the current offset is
     * saved, while if disconnected, the underling source is saved.
     *
     * @param out The object stream to store this object in.
     * @throws IOException If there is a serialization problem.
     */
    private void writeObject(ObjectOutputStream out) throws IOException {
        String href;
        Source source;
        PageIndex index;

        // two cases, reading from a URL and not
        if (null != getConnection()) {
            out.writeBoolean(true);
            out.writeInt(mSource.offset()); // need to preread this much
            href = getUrl();
            out.writeObject(href);
            setUrl(getConnection().getURL().toExternalForm());
            source = getSource();
            mSource = null; // don't serialize the source if we can avoid it
            index = mIndex;
            mIndex = null; // will get recreated; valid for the new page anyway?
            out.defaultWriteObject();
            mSource = source;
            mIndex = index;
        } else {
            out.writeBoolean(false);
            href = getUrl();
            out.writeObject(href);
            setUrl(null); // don't try and read a bogus URL
            out.defaultWriteObject();
            setUrl(href);
        }
    }

    /**
     * Deserialize the page. For details see <code>writeObject()</code>.
     *
     * @param in The object stream to decode.
     * @throws IOException            If there is a deserialization problem with the stream.
     * @throws ClassNotFoundException If the deserialized class can't be located with the
     *                                current classpath and class loader.
     */
    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        boolean fromurl;
        int offset;
        String href;
        URL url;
        Cursor cursor;

        fromurl = in.readBoolean();
        if (fromurl) {
            offset = in.readInt();
            href = (String) in.readObject();
            in.defaultReadObject();
            // open the URL
            if (null != getUrl()) {
                url = new URL(getUrl());
                try {
                    setConnection(url.openConnection());
                } catch (ParserException pe) {
                    throw new IOException(pe.getMessage());
                }
            }
            cursor = new Cursor(this, 0);
            for (int i = 0; i < offset; i++)
                try {
                    getCharacter(cursor);
                } catch (ParserException pe) {
                    throw new IOException(pe.getMessage());
                }
            setUrl(href);
        } else {
            href = (String) in.readObject();
            in.defaultReadObject();
            setUrl(href);
        }
    }

    /**
     * Reset the page by resetting the source of characters.
     */
    public void reset() {
        getSource().reset();
        mIndex = new PageIndex(this); // todo: is this really necessary?
    }

    /**
     * Close the page by destroying the source of characters.
     *
     * @throws IOException If destroying the source encounters an error.
     */
    public void close() throws IOException {
        if (null != getSource()) getSource().destroy();
    }

    /**
     * Clean up this page, releasing resources. Calls <code>close()</code>.
     *
     * @throws Throwable if <code>close()</code> throws an <code>IOException</code>
     *                   .
     */
    protected void finalize() throws Throwable {
        close();
    }

    /**
     * Get the connection, if any.
     *
     * @return The connection object for this page, or null if this page is
     *         built from a stream or a string.
     */
    public URLConnection getConnection() {
        return (mConnection);
    }

    /**
     * Set the URLConnection to be used by this page. Starts reading from the
     * given connection. This also resets the current url.
     *
     * @param connection The connection to use. It will be connected by this method.
     * @throws ParserException If the <code>connect()</code> method fails, or an I/O
     *                         error occurs opening the input stream or the character set
     *                         designated in the HTTP header is unsupported.
     */
    public void setConnection(URLConnection connection) throws ParserException {
        Stream stream;
        String type;
        String charset;
        String contentEncoding;

        mConnection = connection;
        try {
            getConnection().connect();
        } catch (UnknownHostException uhe) {
            throw new ParserException("Connect to " + mConnection.getURL().toExternalForm()
                    + " failed.", uhe);
        } catch (IOException ioe) {
            throw new ParserException("Exception connecting to "
                    + mConnection.getURL().toExternalForm() + " (" + ioe.getMessage() + ").", ioe);
        }
        type = getContentType();
        charset = getCharset(type);
        try {
            contentEncoding = connection.getContentEncoding();
            if ((null != contentEncoding) && (-1 != contentEncoding.indexOf("gzip"))) {
                stream = new Stream(new GZIPInputStream(getConnection().getInputStream()));
            } else if ((null != contentEncoding) && (-1 != contentEncoding.indexOf("deflate"))) {
                stream = new Stream(new InflaterInputStream(getConnection().getInputStream(),
                        new Inflater(true)));
            } else {
                stream = new Stream(getConnection().getInputStream());
            }

            try {
                mSource = new InputStreamSource(stream, charset);
            } catch (UnsupportedEncodingException uee) {
                // StringBuffer msg;
                //
                // msg = new StringBuffer (1024);
                // msg.append (getConnection ().getURL ().toExternalForm ());
                // msg.append (" has an encoding (");
                // msg.append (charset);
                // msg.append (") which is not supported, using ");
                // msg.append (DEFAULT_CHARSET);
                // System.out.println (msg.toString ());
                charset = DEFAULT_CHARSET;
                mSource = new InputStreamSource(stream, charset);
            }
        } catch (IOException ioe) {
            throw new ParserException("Exception getting input stream from "
                    + mConnection.getURL().toExternalForm() + " (" + ioe.getMessage() + ").", ioe);
        }
        mUrl = connection.getURL().toExternalForm();
        mIndex = new PageIndex(this);
    }

    /**
     * Get the URL for this page. This is only available if the page has a
     * connection (<code>getConnection()</code> returns non-null), or the
     * document base has been set via a call to <code>setUrl()</code>.
     *
     * @return The url for the connection, or <code>null</code> if there is no
     *         conenction or the document base has not been set.
     */
    public String getUrl() {
        return (mUrl);
    }

    /**
     * Set the URL for this page. This doesn't affect the contents of the page,
     * just the interpretation of relative links from this point forward.
     *
     * @param url The new URL.
     */
    public void setUrl(String url) {
        mUrl = url;
    }

    /**
     * Gets the baseUrl.
     *
     * @return The base URL for this page, or <code>null</code> if not set.
     */
    public String getBaseUrl() {
        return (mBaseUrl);
    }

    /**
     * Sets the baseUrl.
     *
     * @param url The base url for this page.
     */
    public void setBaseUrl(String url) {
        mBaseUrl = url;
    }

    /**
     * Get the source this page is reading from.
     *
     * @return The current source.
     */
    public Source getSource() {
        return (mSource);
    }

    /**
     * Try and extract the content type from the HTTP header.
     *
     * @return The content type.
     */
    public String getContentType() {
        URLConnection connection;
        String content;
        String ret;

        ret = DEFAULT_CONTENT_TYPE;
        connection = getConnection();
        if (null != connection) {
            // can't use connection#getContentType
            // see Bug #1467712 Page#getCharset never works
            content = connection.getHeaderField("Content-Type");
            if (null != content) ret = content;
        }

        return (ret);
    }

    /**
     * Read the character at the given cursor position. The cursor position can
     * be only behind or equal to the current source position. Returns end of
     * lines (EOL) as \n, by converting \r and \r\n to \n, and updates the
     * end-of-line index accordingly. Advances the cursor position by one (or
     * two in the \r\n case).
     *
     * @param cursor The position to read at.
     * @return The character at that position, and modifies the cursor to
     *         prepare for the next read. If the source is exhausted a zero is
     *         returned.
     * @throws ParserException If an IOException on the underlying source occurs, or an
     *                         attempt is made to read characters in the future (the
     *                         cursor position is ahead of the underlying stream)
     */
    public char getCharacter(Cursor cursor) throws ParserException {
        int i;
        int offset;
        char ret;

        i = cursor.getPosition();
        offset = mSource.offset();
        if (offset == i)
            try {
                i = mSource.read();
                if (Source.EOF == i)
                    ret = EOF;
                else {
                    ret = (char) i;
                    cursor.advance();
                }
            } catch (IOException ioe) {
                throw new ParserException("problem reading a character at position "
                        + cursor.getPosition(), ioe);
            }
        else if (offset > i) {
            // historic read
            try {
                ret = mSource.getCharacter(i);
            } catch (IOException ioe) {
                throw new ParserException("can't read a character at position " + i, ioe);
            }
            cursor.advance();
        } else
            // hmmm, we could skip ahead, but then what about the EOL index
            throw new ParserException("attempt to read future characters from source " + i + " > "
                    + mSource.offset());

        // handle \r
        if ('\r' == ret) { // switch to single character EOL
            ret = '\n';

            // check for a \n in the next position
            if (mSource.offset() == cursor.getPosition())
                try {
                    i = mSource.read();
                    if (Source.EOF == i) {
                        // do nothing
                    } else if ('\n' == (char) i)
                        cursor.advance();
                    else
                        try {
                            mSource.unread();
                        } catch (IOException ioe) {
                            throw new ParserException("can't unread a character at position "
                                    + cursor.getPosition(), ioe);
                        }
                } catch (IOException ioe) {
                    throw new ParserException("problem reading a character at position "
                            + cursor.getPosition(), ioe);
                }
            else
                try {
                    if ('\n' == mSource.getCharacter(cursor.getPosition())) cursor.advance();
                } catch (IOException ioe) {
                    throw new ParserException("can't read a character at position "
                            + cursor.getPosition(), ioe);
                }
        }
        if ('\n' == ret)
            // update the EOL index in any case
            mIndex.add(cursor);

        return (ret);
    }

    /**
     * Return a character. Handles end of lines (EOL) specially, retreating the
     * cursor twice for the '\r\n' case. The cursor position is moved back by
     * one (or two in the \r\n case).
     *
     * @param cursor The position to 'unread' at.
     * @throws ParserException If an IOException on the underlying source occurs.
     */
    public void ungetCharacter(Cursor cursor) throws ParserException {
        int i;
        char ch;

        cursor.retreat();
        i = cursor.getPosition();
        try {
            ch = mSource.getCharacter(i);
            if (('\n' == ch) && (0 != i)) {
                ch = mSource.getCharacter(i - 1);
                if ('\r' == ch) cursor.retreat();
            }
        } catch (IOException ioe) {
            throw new ParserException("can't read a character at position " + cursor.getPosition(),
                    ioe);
        }
    }

    /**
     * Get the current encoding being used.
     *
     * @return The encoding used to convert characters.
     */
    public String getEncoding() {
        return (getSource().getEncoding());
    }

    /**
     * Begins reading from the source with the given character set. If the
     * current encoding is the same as the requested encoding, this method is a
     * no-op. Otherwise any subsequent characters read from this page will have
     * been decoded using the given character set.
     * <p/>
     * Some magic happens here to obtain this result if characters have already
     * been consumed from this page. Since a Reader cannot be dynamically
     * altered to use a different character set, the underlying stream is reset,
     * a new Source is constructed and a comparison made of the characters read
     * so far with the newly read characters up to the current position. If a
     * difference is encountered, or some other problem occurs, an exception is
     * thrown.
     *
     * @param character_set The character set to use to convert bytes into characters.
     * @throws ParserException If a character mismatch occurs between characters already
     *                         provided and those that would have been returned had the
     *                         new character set been in effect from the beginning. An
     *                         exception is also thrown if the underlying stream won't
     *                         put up with these shenanigans.
     */
    public void setEncoding(String character_set) throws ParserException {
        getSource().setEncoding(character_set);
    }

    /**
     * Build a URL from the link and base provided using non-strict rules.
     *
     * @param link The (relative) URI.
     * @param base The base URL of the page, either from the &lt;BASE&gt; tag or,
     *             if none, the URL the page is being fetched from.
     * @return An absolute URL.
     * @throws MalformedURLException If creating the URL fails.
     * @see #constructUrl(String, String, boolean)
     */
    public URL constructUrl(String link, String base) throws MalformedURLException {
        return (constructUrl(link, base, false));
    }

    /**
     * Build a URL from the link and base provided.
     *
     * @param link   The (relative) URI.
     * @param base   The base URL of the page, either from the &lt;BASE&gt; tag or,
     *               if none, the URL the page is being fetched from.
     * @param strict If <code>true</code> a link starting with '?' is handled
     *               according to <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
     *               2396</a>, otherwise the common interpretation of a query
     *               appended to the base is used instead.
     * @return An absolute URL.
     * @throws MalformedURLException If creating the URL fails.
     */
    public URL constructUrl(String link, String base, boolean strict) throws MalformedURLException {
        String path;
        boolean modified;
        boolean absolute;
        int index;
        URL url; // constructed URL combining relative link and base

        // Bug #1461473 Relative links starting with ?
        if (!strict && ('?' == link.charAt(0))) { // remove query part of base if any
            if (-1 != (index = base.lastIndexOf('?'))) base = base.substring(0, index);
            url = new URL(base + link);
        } else
            url = new URL(new URL(base), link);
        path = url.getFile();
        modified = false;
        absolute = link.startsWith("/");
        if (!absolute) { // we prefer to fix incorrect relative links
            // this doesn't fix them all, just the ones at the start
            while (path.startsWith("/.")) {
                if (path.startsWith("/../")) {
                    path = path.substring(3);
                    modified = true;
                } else if (path.startsWith("/./") || path.startsWith("/.")) {
                    path = path.substring(2);
                    modified = true;
                } else
                    break;
            }
        }
        // fix backslashes
        while (-1 != (index = path.indexOf("/\\"))) {
            path = path.substring(0, index + 1) + path.substring(index + 2);
            modified = true;
        }
        if (modified) url = new URL(url, path);

        return (url);
    }

    /**
     * Create an absolute URL from a relative link.
     *
     * @param link The reslative portion of a URL.
     * @return The fully qualified URL or the original link if it was absolute
     *         already or a failure occured.
     */
    public String getAbsoluteURL(String link) {
        return (getAbsoluteURL(link, false));
    }

    /**
     * Create an absolute URL from a relative link.
     *
     * @param link   The reslative portion of a URL.
     * @param strict If <code>true</code> a link starting with '?' is handled
     *               according to <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
     *               2396</a>, otherwise the common interpretation of a query
     *               appended to the base is used instead.
     * @return The fully qualified URL or the original link if it was absolute
     *         already or a failure occured.
     */
    public String getAbsoluteURL(String link, boolean strict) {
        String base;
        URL url;
        String ret;

        if ((null == link) || ("".equals(link)))
            ret = "";
        else
            try {
                base = getBaseUrl();
                if (null == base) base = getUrl();
                if (null == base)
                    ret = link;
                else {
                    url = constructUrl(link, base, strict);
                    ret = url.toExternalForm();
                }
            } catch (MalformedURLException murle) {
                ret = link;
            }

        return (ret);
    }

    /**
     * Get the line number for a cursor.
     *
     * @param cursor The character offset into the page.
     * @return The line number the character is in.
     */
    public int row(Cursor cursor) {
        return (mIndex.row(cursor));
    }

    /**
     * Get the line number for a cursor.
     *
     * @param position The character offset into the page.
     * @return The line number the character is in.
     */
    public int row(int position) {
        return (mIndex.row(position));
    }

    /**
     * Get the column number for a cursor.
     *
     * @param cursor The character offset into the page.
     * @return The character offset into the line this cursor is on.
     */
    public int column(Cursor cursor) {
        return (mIndex.column(cursor));
    }

    /**
     * Get the column number for a cursor.
     *
     * @param position The character offset into the page.
     * @return The character offset into the line this cursor is on.
     */
    public int column(int position) {
        return (mIndex.column(position));
    }

    /**
     * Get the text identified by the given limits.
     *
     * @param start The starting position, zero based.
     * @param end   The ending position (exclusive, i.e. the character at the
     *              ending position is not included), zero based.
     * @return The text from <code>start</code> to <code>end</code>.
     * @throws IllegalArgumentException If an attempt is made to get characters ahead of the
     *                                  current source offset (character position).
     * @see #getText(StringBuffer, int, int)
     */
    public String getText(int start, int end) throws IllegalArgumentException {
        String ret;

        try {
            ret = mSource.getString(start, end - start);
        } catch (IOException ioe) {
            throw new IllegalArgumentException("can't get the " + (end - start)
                    + "characters at position " + start + " - " + ioe.getMessage());
        }

        return (ret);
    }

    /**
     * Put the text identified by the given limits into the given buffer.
     *
     * @param buffer The accumulator for the characters.
     * @param start  The starting position, zero based.
     * @param end    The ending position (exclusive, i.e. the character at the
     *               ending position is not included), zero based.
     * @throws IllegalArgumentException If an attempt is made to get characters ahead of the
     *                                  current source offset (character position).
     */
    public void getText(StringBuffer buffer, int start, int end) throws IllegalArgumentException {
        int length;

        if ((mSource.offset() < start) || (mSource.offset() < end))
            throw new IllegalArgumentException("attempt to extract future characters from source"
                    + start + "|" + end + " > " + mSource.offset());
        if (end < start) {
            length = end;
            end = start;
            start = length;
        }
        length = end - start;
        try {
            mSource.getCharacters(buffer, start, length);
        } catch (IOException ioe) {
            throw new IllegalArgumentException("can't get the " + (end - start)
                    + "characters at position " + start + " - " + ioe.getMessage());
        }
    }

    /**
     * Get all text read so far from the source.
     *
     * @return The text from the source.
     * @see #getText(StringBuffer)
     */
    public String getText() {
        return (getText(0, mSource.offset()));
    }

    /**
     * Put all text read so far from the source into the given buffer.
     *
     * @param buffer The accumulator for the characters.
     * @see #getText(StringBuffer, int, int)
     */
    public void getText(StringBuffer buffer) {
        getText(buffer, 0, mSource.offset());
    }

    /**
     * Put the text identified by the given limits into the given array at the
     * specified offset.
     *
     * @param array  The array of characters.
     * @param offset The starting position in the array where characters are to be
     *               placed.
     * @param start  The starting position, zero based.
     * @param end    The ending position (exclusive, i.e. the character at the
     *               ending position is not included), zero based.
     * @throws IllegalArgumentException If an attempt is made to get characters ahead of the
     *                                  current source offset (character position).
     */
    public void getText(char[] array, int offset, int start, int end)
            throws IllegalArgumentException {
        int length;

        if ((mSource.offset() < start) || (mSource.offset() < end))
            throw new IllegalArgumentException("attempt to extract future characters from source");
        if (end < start) { // swap
            length = end;
            end = start;
            start = length;
        }
        length = end - start;
        try {
            mSource.getCharacters(array, offset, start, end);
        } catch (IOException ioe) {
            throw new IllegalArgumentException("can't get the " + (end - start)
                    + "characters at position " + start + " - " + ioe.getMessage());
        }
    }

    /**
     * Get the text line the position of the cursor lies on.
     *
     * @param cursor The position to calculate for.
     * @return The contents of the URL or file corresponding to the line number
     *         containing the cursor position.
     */
    public String getLine(Cursor cursor) {
        int line;
        int size;
        int start;
        int end;

        line = row(cursor);
        size = mIndex.size();
        if (line < size) {
            start = mIndex.elementAt(line);
            line++;
            if (line <= size)
                end = mIndex.elementAt(line);
            else
                end = mSource.offset();
        } else
        // current line
        {
            start = mIndex.elementAt(line - 1);
            end = mSource.offset();
        }

        return (getText(start, end));
    }

    /**
     * Get the text line the position of the cursor lies on.
     *
     * @param position The position to calculate for.
     * @return The contents of the URL or file corresponding to the line number
     *         containg the cursor position.
     */
    public String getLine(int position) {
        return (getLine(new Cursor(this, position)));
    }

    /**
     * Display some of this page as a string.
     *
     * @return The last few characters the source read in.
     */
    public String toString() {
        StringBuffer buffer;
        int start;
        String ret;

        if (mSource.offset() > 0) {
            buffer = new StringBuffer(43);
            start = mSource.offset() - 40;
            if (0 > start)
                start = 0;
            else
                buffer.append("...");
            getText(buffer, start, mSource.offset());
            ret = buffer.toString();
        } else
            ret = super.toString();

        return (ret);
    }
}
